library(tidyverse)
library(cowplot)
library(ggpubr)
library(pheatmap)
library(RColorBrewer)
library(vegan)
library(reshape2)
library(magrittr)
library(ggplot2)
theme_set(theme_bw())
To enable reproducible study, we provided the raw-data and source codes for analysis and generating figures. Figures 1-6 and Figures S2-S5 were generated by the following R codes.
Raw data were stored in the data folder. It mainly comes from two experiments: one is the BIOLOG standard assay with eco-plate, and the other is the species-specific qPCR assasy. The raw data is provided as in formatted form.
biolog <- read_csv("data/biolog.csv")
qPCR_data <- read_csv(file="data/qPCR.csv")
mono_data <- read_csv("data/mono.csv")
head(biolog)
#> # A tibble: 6 x 5
#> ratio0 plate carbon_id A590 A750
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 equal 1 1 0.191 0.138
#> 2 equal 1 2 0.344 0.181
#> 3 equal 1 3 1.37 0.561
#> 4 equal 1 4 1.25 0.778
#> 5 equal 1 5 0.191 0.137
#> 6 equal 1 6 0.259 0.142
head(qPCR_data)
#> # A tibble: 6 x 6
#> ratio0 plate carbon_id EC PP ratio1
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 less 1 10 177416. 175245243. 0.00101
#> 2 less 1 11 239521. 148368132. 0.00161
#> 3 less 1 12 29837437. 142288704. 0.210
#> 4 less 1 13 645563. 162324668. 0.00398
#> 5 less 1 14 52481. 142197847. 0.000369
#> 6 less 1 15 164023932. 156667034. 1.05
head(mono_data)
#> # A tibble: 6 x 4
#> plate carbon_id EC PP
#> <dbl> <dbl> <dbl> <dbl>
#> 1 1 1 10455221. 198817093.
#> 2 1 2 59042727. 11468217.
#> 3 1 3 54167912. 69902664.
#> 4 1 4 61019235. 97434837.
#> 5 1 5 14130781. 71788638.
#> 6 1 6 5894491. 70394115.
The columns are:
ratio0: initial ratio, indicating the name of cultures. “none”, “less”, “equal”, “more”, “all” represent the P. putida monoculture, 1:1000 (EC/PP, same below), 1:1, 1000:1 cocultrues and and E. coli monoculture, respectively.plate: experiment replicates.A590: the absorbtance in 590 nm, as reported by BIOLOG workstation, a measurment of carbon usage efficiency (CUE) in this study.A750: the absorbtance in 750 nm, as reported by BIOLOG workstation.carbon_id: the id of carbon sources. From 1-72, in which 1 is the negative control. The following variable carbon_name shows the name of each carbon sources.EC: the quantity of E. coli in coculturePP: the quantity of P. putida in coculturecarbon_name <- read_csv("data/carbon.csv")
head(carbon_name)
#> # A tibble: 6 x 2
#> carbon_id carbon_source
#> <dbl> <chr>
#> 1 2 Dextrin
#> 2 3 D-Maltose
#> 3 4 D-Trehalose
#> 4 5 D-Cellobiose
#> 5 6 Gentiobiosse
#> 6 7 Sucrose
The E. coli and P. putida qPCR primers were specific (see Figure S1).
Figure S1. Specificity of species-specific primers. The PCR experiments were performed with E. coli (EC) and P. putida (PP) specific primers and their genomic DNA, respectively.
qpcr quantification
mono_data <- mono_data %>% melt(id.vars=c("plate","carbon_id"),variable.name ="Target.Name",value.name="Quantity_mono")
cocu_data <-qPCR_data %>% select(plate,carbon_id,ratio0,EC,PP) %>% melt(id.vars=c("plate","carbon_id","ratio0"),variable.name ="Target.Name",value.name="Quantity_cocu")
data_all <- merge(mono_data, cocu_data, by = c("carbon_id","Target.Name","plate"),all=T) %>% filter(carbon_id!="1")
A590 was normalized by substrating the value of negative control in each plate.
qPCR_data <- qPCR_data %>%
mutate(ratio0 = factor(ratio0, levels = c("less","equal","more")))
# Normalization
biolog_24h <- biolog %>%
mutate(ratio0 = factor(ratio0, levels = c("none","less","equal","more","all"))) %>%
group_by(plate,ratio0) %>%
mutate(A590=A590-A590[carbon_id==1],A750=A750-A750[carbon_id==1]) %>% # set negative control to zero
filter(carbon_id!=1) %>%
ungroup()
biolog_mono_24h <- biolog_24h %>%
filter(ratio0 %in% c("none","all")) %>%
mutate(species=factor(ratio0,levels = c("all","none"),labels = c("E. coli","P. putida"))) %>%
dplyr::select(-ratio0)
biolog_coculture_24h <- biolog_24h %>%
filter(ratio0 %in% c("less","equal","more")) %>%
mutate(ratio0 = factor(ratio0, levels = c("less","equal","more")))
71 different carbon sources were used in this study. We firstly need to group or cluster them into different sub-groups. In this study, we used two ways in doing this.
Firstly, carbon sources were clustered by the A590 in all the cultures. This is what we called “usage group”. Three usage groups were generated using hclust() method in R, and were named as U1, U2 and U3.
M_A590_24h <- biolog_24h %>% mutate(sample=paste(ratio0,plate,sep="-")) %>%
dplyr::select(sample,carbon_id,A590) %>%
spread(key=sample,value=A590) %>%
as.data.frame() %>%
tibble::column_to_rownames(var="carbon_id")
k3 <- cutree(hclust(dist(M_A590_24h)),k=3)
carbon_group <- data.frame(usage=k3) %>%
rownames_to_column(var="carbon_id") %>%
mutate(carbon_id=as.numeric(carbon_id)) %>%
mutate(usage=paste("U",usage,sep=""))
carbon_name <- left_join(carbon_name, carbon_group)
Secondly, carbon preferences were determined by comparing the A590 values between E. coli and P. putida monocultures.
biolog_mono_A590_24h <- biolog_mono_24h %>%
dplyr::select(plate,carbon_id,species,A590) %>%
spread(species,A590)
PP_prefered <- biolog_mono_A590_24h %>%
group_by(carbon_id) %>%
summarise(p=t.test(`P. putida`,`E. coli`,alternative = "greater")$p.value) %>%
filter(p<0.05)
EC_prefered <- biolog_mono_A590_24h %>%
group_by(carbon_id) %>%
summarise(p=t.test(`P. putida`,`E. coli`,alternative = "less")$p.value) %>%
filter(p<0.05)
carbon_prefer <- data.frame("carbon_id"=carbon_name$carbon_id,
"prefer"="none",
stringsAsFactors = F)
carbon_prefer[carbon_prefer$carbon_id %in% EC_prefered$carbon_id,"prefer"] <- "EC"
carbon_prefer[carbon_prefer$carbon_id %in% PP_prefered$carbon_id,"prefer"] <- "PP"
carbon_name <- left_join(carbon_name, carbon_prefer)
In E. coli preferred carbon sources, the CUE of E. coli is statistically greater than that of P. putida, while in P. putida preferred carbon sources, the CUE of P. putida is statistically greater than that of E. coli.
See below for more information.
Table S1 listed all the 71 carbon sources used in this study.
carbon_name %>%
left_join(carbon_prefer) %>%
DT::datatable(rownames = FALSE)
Table S1. The list of 71 carbon sources used in this study
The social interaction model was shown in Figure 1.
Figure 1. The interaction model used in this study. (A) The phenotype (CUE) of strain A and strain B in monoculture are CUE_A_ and CUEB, respectively. The CUE of two strains in coculture is assumed not equal, and CUEA is higher than CUEB. (B) When strain A and B were co-cultivated, coculture CUE may fail into three aspects. Firstly, if CUE > CUEA, it was interpreted as positive interaction mode (induction) because of the coculture increased total capacity comparing with the best of monoculture. By contrast, if CUE < CUEB, it was interpreted as negative interaction mode (reduction). Besides, if CUE is between CUEA and CUEB, an additional hypothesis test method was applied to reveal interaction mode as described in Methods (C and D). (C) shows an example of positive interaction. The measured coculture CUE has a normal distribution with the mean of μ1, and the relative abundance of strain A and strain B are A% and B%, respectively. Using the monoculture CUEA and CUEB, and their relative abundance, we can get the calculated CUE, which also has a normal distribution with the mean of μ2. If μ1 is significantly greater than μ2, we defined the coculture with a positive mode of interaction. (D) shows an example of negative interaction. Although the measured coculture CUE is equal to that in (C), we get a negative interaction mode as they have different relative abundance of strain A and B.
According to this model, we then get the interaction mode of each culture under a specific carbon source and initial ratio.
ratio1 <- qPCR_data %>% filter(ratio0 %in% c("less","equal","more")) %>%
complete(ratio0,carbon_id,plate) %>%
group_by(ratio0,carbon_id) %>%
dplyr::select(ratio0,plate,carbon_id,ratio1) %>%
mutate(ratio1_mean=mean(ratio1,na.rm = T)) %>%
mutate(ratio1=ifelse(is.na(ratio1),ratio1_mean,ratio1)) %>%
dplyr::select(-ratio1_mean)
mono_A590 <- biolog_mono_24h %>%
group_by(carbon_id,species) %>%
summarise(A590=mean(A590)) %>%
spread(key="species",value="A590")
A590_caculated <- left_join(ratio1,mono_A590) %>% mutate(A590_cac=(`P. putida`+ratio1*`E. coli`)/(1+ratio1))
social <- biolog_coculture_24h %>% dplyr::select(plate,carbon_id,ratio0,A590) %>%
left_join(A590_caculated) %>%
group_by(carbon_id,ratio0) %>%
mutate(p_pos=t.test(x=A590,y=A590_cac,alternative = "greater")$p.value,
p_neg=t.test(x=A590,y=A590_cac,alternative = "less")$p.value) %>%
mutate(social_type=ifelse(
p_pos<0.05,"+",
ifelse(p_neg<0.05,"-","unresolved"))
) %>%
# mutate(social_type=ifelse(social_type=="+" & (mean(A750)>max(mean(`E. coli`),mean(`P. putida`))),"++",social_type)) %>%
# mutate(social_type=ifelse(social_type=="-" & (mean(A750)<min(mean(`E. coli`),mean(`P. putida`))),"--",social_type)) %>%
ungroup() %>%
dplyr::select(carbon_id,ratio0,social_type,p_pos,p_neg) %>%
unique() %>%
mutate(ratio0=factor(ratio0,levels = c("less","equal","more")))
table(social$social_type) %>% barplot(col=c("blue","red","grey"))
Finally, we get 70 negative, 59 positive and 80 unresolved interaction modes.
Social interaction defined based on the absolute density of monoculture and co-culture of two species.
social_qpcr <- data_all %>%
group_by(carbon_id,Target.Name,ratio0) %>%
summarise(p_pos=t.test(x=log10(Quantity_cocu),y=log10(Quantity_mono),alternative = "greater")$p.value,
p_neg=t.test(x=log10(Quantity_cocu),y=log10(Quantity_mono),alternative = "less")$p.value)%>%
mutate(social_type=ifelse(
p_pos<0.05,"+",
ifelse(p_neg<0.05,"-","unresolved"))
) %>% ungroup() %>%
mutate(ratio0=factor(ratio0,levels = c("less","equal","more")))
table(social_qpcr$social_type) %>% barplot(col=c("blue","red","grey"))
We now have multiple parameters, including the initial ratio (ratio0) and the final ratio (ratio1), the populations of E. coli and P. putida in cocultures (EC and PP), the CUE (A590), the preference of carbon sources to E. coli and P. putida, and the social interaction mode (social_type) calculated as described in Figure 1.
We merged all these data into on date frame in R, and run statistical analysis as follows.
merged <- left_join(biolog_coculture_24h,qPCR_data) %>%
left_join(social) %>%
left_join(carbon_name) %>%
filter(!is.na(ratio1))
Total data containing social behavior based on absolute density definition
social_qpcr <-social_qpcr %>% select(social_type,carbon_id,ratio0)
merged_qpcr <- qPCR_data %>%
left_join(social_qpcr) %>%
left_join(carbon_name) %>%
filter(!is.na(ratio1)) %>% mutate(social_type=factor(social_type,levels = c('unresolved','+','-'))) %>% mutate(prefer=factor(prefer,levels = c('none','EC','PP')))
merged_qpcr$EC <- log10(merged_qpcr$EC)
merged_qpcr$PP <- log10(merged_qpcr$PP)
A prerequisite for running several analysis needs a normal distribution of data, therefore, We explored the normality of observed data in merged data. Accroding to the skewness of data value, we transformed the original data with selected methods. After normalization, it can be seen that all key variables approximately fit the normal distributions.
merged <- merged %>% filter(ratio0 %in% c("less","equal","more"))
par(mfrow=c(3,4))
hist(merged$EC)
qqnorm(merged$EC)
hist(log10(merged$EC))
qqnorm(log10(merged$EC))
hist(merged$PP)
qqnorm(merged$PP)
hist(log10(merged$PP))
qqnorm(log10(merged$PP))
hist(merged$ratio1)
qqnorm(merged$ratio1)
hist(log10(merged$ratio1))
qqnorm(log10(merged$ratio1))
Therefore, we applied log transformation to the quantity of E. coli (EC) and P. putida (PP), and sqrt root transformation to A590 values.
merged <- merged %>%
# mutate_at(c("A590"),sqrt) %>%
mutate_at(c("EC","PP","ratio1"),log10)
Furthermore, the reference of group variables were optimized. For example, the initial ratio base level was set to “equal”, i.e. 1:1 of E. coli and P. putida. The base level for social_type is “unresolved”, the base level of usage is “U1”, and the base level of prefer is “none”.
merged$ratio0 <- relevel(merged$ratio0,"equal")
merged$social_type <- as_factor(merged$social_type)
merged$social_type <- relevel(merged$social_type,"unresolved")
merged$usage <- as_factor(merged$usage)
merged$usage <- relevel(merged$usage, "U1")
merged$prefer <- as_factor(merged$prefer)
merged$prefer <- relevel(merged$prefer,"none")
The following diagram shows the correlation of three dependent values, which are the CUE (A590), the quantity of E. coli (EC) and P. putida (PP), and the final ratio (ratio1). CUE is more associated with the quantity of P. putida than the quantity of E. coli, although both associations are significant. In addition, final ratio is positively associated with E. coli quantity and negatively associated with P. putida quantity. Besides, the association between E. coli and P. putida is exist as well.
library(corrplot)
mat <- merged[c("A590","EC","PP","ratio1")]
res <- cor.mtest(mat, conf.level=0.95)
cor <- cor(mat)
corrplot(cor, type = "upper", sig.level = c(.001, .01, .05), pch.cex = .9, insig = "label_sig",
p.mat = res$p,
addCoef.col = "grey80",
diag = FALSE)
We used multiple linear regression to explore the correlations between input and output variables. The input variables include the inital ratio (ratio0), carbon sources, which are further clustered by carbon preference (prefer) or usage (usage). The output variables include the CUE (A590), bacteria quantities of E. coli (EC) and P. putida (PP), and the social interaction mode (social_type).
data <- merged[,c("ratio0","usage","prefer","A590","EC","PP","ratio1","social_type")]
First of all, the carbon usage efficiency (CUE, i.e. A590) was regressed with initial ratio, carbon usage group, carbon preference. The regression model results showed that 68% of the dependent variable variation can be explained based on adjusted R-squared (0.6782, p < 2.2e-16). The factors which significantly influence CUE (A590) are, from strong to weak, carbon usage group (as compared with U1 carbon sources),initial ratio (ratio0, as compared with the 1:1 coculture), interaction type (social_type+, as compared with unresolved interaction mode), and carbon preference (as compared with non-preferred carbon sources)). Besides, when the other parameters were controlled, either increasing the initial ratio from 1:1 to 1000:1 or decreasing the initial ratio from 1:1 to 1:1000, will significantly lower CUE. Summary of the linear model was showed as follows.
model <- lm(A590~ ratio0 + usage + prefer, data = data)
summary(model)
#>
#> Call:
#> lm(formula = A590 ~ ratio0 + usage + prefer, data = data)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.64818 -0.16779 0.00678 0.11771 1.33998
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.21820 0.02503 8.717 < 2e-16 ***
#> ratio0less -0.33896 0.02686 -12.618 < 2e-16 ***
#> ratio0more -0.11341 0.02689 -4.218 2.88e-05 ***
#> usageU2 0.61723 0.03131 19.716 < 2e-16 ***
#> usageU3 0.57853 0.03487 16.589 < 2e-16 ***
#> preferEC 0.07031 0.03078 2.285 0.0227 *
#> preferPP 0.15645 0.03180 4.920 1.14e-06 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.248 on 560 degrees of freedom
#> Multiple R-squared: 0.6816, Adjusted R-squared: 0.6782
#> F-statistic: 199.8 on 6 and 560 DF, p-value: < 2.2e-16
# car::vif(model)
Secondly, the relationship between the final ratio (ratio1) and input variables was investigated. This model explains 64% variances of input variables (adjusted R-squared = 0.6389, p-value < 2.2e-16). And it shows the final ratio is significantly correlated with all the variables. While other parameters are controlled, growing in E. coli prefered carbon source (preferEC) is the most important positive influence fator to final ratio, followed by usage U2 (usageU2) . By contrast, ratio0less ,usageU3,preferPP and and ratio0more are, from strong to weak, the negative influence fators to final ratio.
model <- lm(ratio1 ~ ratio0 + usage + prefer, data = data)
summary(model)
#>
#> Call:
#> lm(formula = ratio1 ~ ratio0 + usage + prefer, data = data)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -1.82699 -0.32735 -0.00437 0.28300 2.04303
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -1.26415 0.05038 -25.094 < 2e-16 ***
#> ratio0less -1.07556 0.05407 -19.894 < 2e-16 ***
#> ratio0more -0.12509 0.05412 -2.311 0.0212 *
#> usageU2 0.31661 0.06301 5.025 6.78e-07 ***
#> usageU3 -0.37956 0.07019 -5.408 9.47e-08 ***
#> preferEC 0.53330 0.06194 8.610 < 2e-16 ***
#> preferPP -0.16508 0.06400 -2.579 0.0102 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.499 on 560 degrees of freedom
#> Multiple R-squared: 0.6427, Adjusted R-squared: 0.6389
#> F-statistic: 167.9 on 6 and 560 DF, p-value: < 2.2e-16
# car::vif(model)
Likewise, the relationship between the population of P. putida (or E. coli) and input variables was investigated.
As shown below, this model explains 59% of the input variable variations (R-squared = 0.5904, p < 2.2e-16). And we found the population of P. putida in cocultures is, from strong to weak, significantly associated carbon sources and the initial ratio. Notably, carbon usage groups exhibits stronger influence to CUE than perference. Interestingly, more E. coli in initial inoculate (more, 1000:1) is helpful to increase the final population of P. putida compared the 1:1 initial ratio when other parameters were controlled. By contrast, less E. coli in initial inoculate (less, 1:1000) will decrease the final population of P. putida compared the 1:1 initial ratio when other parameters were controlled
model <- lm(PP~ ratio0 + usage + prefer, data = data)
summary(model)
#>
#> Call:
#> lm(formula = PP ~ ratio0 + usage + prefer, data = data)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.70041 -0.12984 0.00254 0.13517 0.68474
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 8.27027 0.02282 362.394 < 2e-16 ***
#> ratio0less -0.06862 0.02449 -2.802 0.00526 **
#> ratio0more 0.05048 0.02452 2.059 0.03995 *
#> usageU2 0.36134 0.02854 12.660 < 2e-16 ***
#> usageU3 0.51540 0.03180 16.210 < 2e-16 ***
#> preferEC -0.02260 0.02806 -0.805 0.42096
#> preferPP 0.14206 0.02899 4.900 1.26e-06 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.2261 on 560 degrees of freedom
#> Multiple R-squared: 0.5947, Adjusted R-squared: 0.5904
#> F-statistic: 137 on 6 and 560 DF, p-value: < 2.2e-16
This model explains 70% of the input variable variations (R-squared = 0.7047, p < 2.2e-16). We found that the population of E. coli is negatively correlated with initial ratio, while positively correlated with carbon usage group U2 (usageU2), preferred carbon sources (preferEC). These results indicated that niche condition is the major challenge to the E. coli population.
model <- lm(EC~ ratio0 + usage + prefer, data = data)
summary(model)
#>
#> Call:
#> lm(formula = EC ~ ratio0 + usage + prefer, data = data)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -1.83058 -0.26505 -0.01877 0.25207 1.67502
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 7.00611 0.04422 158.448 <2e-16 ***
#> ratio0less -1.14418 0.04745 -24.111 <2e-16 ***
#> ratio0more -0.07461 0.04750 -1.571 0.1168
#> usageU2 0.67795 0.05530 12.259 <2e-16 ***
#> usageU3 0.13584 0.06160 2.205 0.0279 *
#> preferEC 0.51070 0.05436 9.394 <2e-16 ***
#> preferPP -0.02301 0.05618 -0.410 0.6822
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.438 on 560 degrees of freedom
#> Multiple R-squared: 0.7078, Adjusted R-squared: 0.7047
#> F-statistic: 226.1 on 6 and 560 DF, p-value: < 2.2e-16
Finally, we used multinomial logistic regression to study the influence factors to social interaction.
model <- nnet::multinom(social_type ~ ratio0 + usage + prefer, data = data)
#> # weights: 24 (14 variable)
#> initial value 622.913168
#> iter 10 value 406.053003
#> iter 20 value 400.407045
#> final value 400.397415
#> converged
summary(model)
#> Call:
#> nnet::multinom(formula = social_type ~ ratio0 + usage + prefer,
#> data = data)
#>
#> Coefficients:
#> (Intercept) ratio0less ratio0more usageU2 usageU3 preferEC preferPP
#> + -0.95563691 -4.610907 -1.730681 2.558865 -0.7874184 3.0731151 1.4921352
#> - -0.06021649 1.118536 -1.511092 -1.567289 0.2229073 -0.4540912 -0.1812898
#>
#> Std. Errors:
#> (Intercept) ratio0less ratio0more usageU2 usageU3 preferEC preferPP
#> + 0.3351764 0.6170528 0.3383025 0.4632859 0.4500019 0.4316674 0.4307093
#> - 0.2545723 0.2911163 0.3208524 0.4530683 0.3646688 0.3775650 0.3361113
#>
#> Residual Deviance: 800.7948
#> AIC: 828.7948
While comparing positive interaction with unresolved interaction, we get a model of:
\(ln(+/unresolved)= -0.96 - 4.61 * ratio0less - 1.73 * ratio0more + 2.56* usageU2 - 0.79 * usageU3 + 3.07 * preferEC + 1.49 * preferPP\)
Of all the input variables, usageU2 (comparing with usageU1) and preferEC (comparing with prefernone) exhibit the greatest influence to positive interaction, and both are significant (see below).
While comparing negative interaction with unresolved interaction, we get a model of:
\(ln(-/unresolved) = -0.06 + 1.12 * ratio0less - 1.51 * ratio0more - 1.57 * usageU2 + 0.22 * usageU3 - 0.45 * preferEC - 0.18 * preferPP\)
From this model, we learn that usageU2 (as compared with U1 carbon sources) is the most important influence factor to negative interaction. Other significant factors are ratio0more, ratio0less and preferEC (see below).
rstatix::tidy(model) %>%
mutate(sig = cut(p.value, breaks = c(0,0.001,0.01,0.05,1),labels = c("***","**", "*", ""), include.lowest = TRUE)) %>%
mutate_at(c("estimate","std.error","statistic"), round, digits = 3) %>%
mutate_at("p.value", formatC, format = "e", digits = 1) %>%
DT::datatable(options = list(pageLength=7))
We used ANOVA tests to reveal the variance of final ratios of three cocultures in every carbon source. Since multiple comparisons were included, the p-values were adjusted using “BH” method.
aov_p <- compare_means(ratio1 ~ ratio0,
group.by = "carbon_id",
data=merged,
method = "anova",
p.adjust.method = "BH") %>%
arrange(p.adj) %>%
mutate(p.adj.signif = cut(p.adj,breaks = c(0,0.01,0.05,1),labels = c("**","*","ns"))) %>%
left_join(carbon_prefer)
The significances of ANOVA tests were visulized in Figure 2A, and five examples of significant and non-significant results were given in Figure 2B & C. Besides, all test results for 71 different carbon sources were supplied in Figure 2S.
p.cutoff <- 0.05
p1 <- ggplot(aov_p,aes(p.adj)) +
# geom_histogram(bins=30) +
geom_line(stat = "density",lwd=1) +
geom_density(lwd=0,color=NA,fill="lightblue") +
geom_vline(xintercept = p.cutoff,lwd=1,lty="dashed",color="firebrick") +
geom_text(x=0.06,y=0,label=p.cutoff,
vjust="top",
hjust="left",
color="firebrick")
p2 <- ggplot(aov_p, aes(p.adj.signif,fill=prefer)) + geom_bar() +
labs(x="significance of p.adj",y="frequency") +
# geom_text(aes(label=Freq),vjust=0,nudge_y = 1) +
scale_fill_discrete(breaks=c("none","EC","PP"),labels=c("none","E. coli","P. putida"),name="Preference") +
theme(legend.text = element_text(face="italic"),
legend.position = c(0.65,0.7))
library(grid)
vp <- viewport(width=0.3,height=0.6,x=0.7,y=0.5)
pushViewport(vp)
print(p1)
print(p2,vp=vp)
Figure 2A. The density of adjusted p-value (ANOVA) in testing whether final ratios of three coculture are different in all carbon sources. X-axis represents the adjusted p-value, and vertical line indicates the position of p-value cutoff (0.05). inset: barplot shows the frequency of significance of adjusted p-value (**, p<0.01, *, p<0.05, ns, p≥0.05). In the barplot, frequencies were colored by carbon preference.
# ggsave("figure 2A.tiff",path="figures")
merged$ratio0 <- relevel(merged$ratio0, "less")
carbon_name_labeller <- function(x){
name_of <- carbon_name$carbon_source
names(name_of) <- carbon_name$carbon_id
return(as.character(name_of[x]))
}
selected_significant_carbon_id <- c(29,32,36,39,46)
selected_nonsignificant_carbon_id <- c(3,4,12,15,53)
p1 <- ggplot(
data=filter(merged,carbon_id %in% selected_significant_carbon_id) %>%
left_join(aov_p),
mapping = aes(ratio0,ratio1,color=prefer))
#> Warning: Column `prefer` joining factor and character vector, coercing into
#> character vector
p2 <- ggplot(
data=filter(merged,carbon_id %in% selected_nonsignificant_carbon_id) %>%
left_join(aov_p),
mapping = aes(ratio0,ratio1,color=prefer))
#> Warning: Column `prefer` joining factor and character vector, coercing into
#> character vector
plots <- lapply(list(p1,p2),function(x){
x + geom_boxplot() + geom_jitter() +
geom_text(aes(x="equal", y=0.15,label= paste("p.adj=",p.adj,sep = "")),check_overlap = T,size=3,show.legend = FALSE) +
geom_text(aes(x="less",y=.65,label=carbon_id),color="grey",size=3) +
facet_wrap(~carbon_id,
ncol=5,
labeller = labeller(carbon_id=carbon_name_labeller)) +
# stat_compare_means(method="aov") +
scale_x_discrete(breaks=c("less","equal","more"),labels=c("1:1000","1:1","1000:1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1,vjust = 1)) +
scale_color_discrete(breaks=c("none","EC","PP"),labels=c("none","E. coli","P. putida"),name="Preference")+
theme(legend.text = element_text(face="italic")) +
labs(x="",y="final ratio (EC/PP)")
})
plot_grid(plotlist = plots,ncol=1,labels=c("B","C"))
Figure 2B,C. Final ratios of cocultures. Examples for significant results (B) or non-significant results (C) of final ratios. The strip label of each subplot (top) indicates carbon source.
# ggsave("figure 2.tiff",path="figures")
The whole picture of final ratios.
ggplot(merged %>% left_join(aov_p),aes(ratio0,ratio1,color=prefer)) + geom_boxplot() +
geom_text(aes(x="less",y=1,label=paste0(carbon_id,"(p=",p.adj,")")),color="grey",vjust="inward",hjust="inward",size=3,show.legend = F) +
# ggpubr::stat_compare_means(method="aov",label="p.signif") +
facet_wrap(~carbon_id) +
# geom_jitter() +
# geom_text(aes(x="equal", y=0.5,label= p.adj),check_overlap = T, data=aov_p,inherit.aes = FALSE,size=3) +
# scale_y_log10(breaks=c(0.001,0.01,0.1,1,10),labels=c("0.001","0.01","0.1","1","10")) +
xlab("") + ylab("final ratio (EC/PP)") +
scale_x_discrete(breaks=c("less","equal","more"),labels=c("1:1000","1:1","1000:1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1,vjust = 1)) +
scale_color_discrete(breaks=c("none","EC","PP"),labels=c("none","E. coli","P. putida"),name="Preference")+
theme(legend.text = element_text(face="italic")) +
theme(axis.text.x = element_text(angle = 90,hjust = 1,vjust = 0.5),
legend.position = "top",
legend.direction = "horizontal",
strip.background = element_blank(), # remove facet label - "strip"
strip.text = element_blank())
#> Warning: Column `prefer` joining factor and character vector, coercing into
#> character vector
# ggsave("figure S2.tiff",path="figures")
# export::graph2ppt(file="figures.pptx",append=TRUE)
plots <- lapply(c("EC","PP"),function(x){
d <- biolog_mono_24h %>%
left_join(carbon_name) %>%
filter(prefer == x)
ggplot(d,aes(carbon_source,A590,fill=species)) +
geom_boxplot() +
labs(y="CUE",x="") +
# coord_flip() +
theme(legend.position = c(0.5,0.9),
legend.direction = "horizontal",
legend.text = element_text(face = "italic"),
legend.title = element_blank(),
axis.text.x = element_text(angle = 60, hjust = 1,vjust = 1))
})
ratio0_labeller <- function(x){
name_of_ratio0 <- c("P. putida","1:1000","1:1","1000:1","E. coli")
names(name_of_ratio0) <- c("none","less","equal","more","all")
return(as.character(name_of_ratio0[x]))
}
p1 <- ggplot(merged,aes(x=prefer,y=ratio1)) +
geom_boxplot() +
facet_wrap(~ratio0,
labeller = labeller(ratio0 = ratio0_labeller)) +
scale_x_discrete(breaks=c("none","EC","PP"),labels=c("none","E. coli","P. putida")) +
theme(axis.text.x = element_text(face = "italic",
angle = 60,
hjust = 1,
vjust = 1)) +
stat_compare_means(method="wilcox.test",comparisons = list(c("EC","none"),c("none","PP"),c("EC","PP")),size=3) +
labs(x="type of carbon perference", y="final ratio (EC/PP)") +
ylim(c(NA,1.5))
prefer_labeller <- function(x){
name_of_prefer <- c("none","E. coli","P. putida")
names(name_of_prefer) <- c("none","EC","PP")
return(as.character(name_of_prefer[x]))
}
p2 <- ggplot(merged,aes(x=ratio0,y=ratio1)) +
geom_boxplot() +
facet_wrap(~prefer, labeller = labeller(prefer = prefer_labeller)) +
theme(strip.text = element_text(face = "italic")) +
stat_compare_means(method="wilcox.test",comparisons = list(c("less","equal"),c("equal","more"),c("less","more")),size=3) +
scale_x_discrete(breaks=c("less","equal","more"),labels=c("1:1000","1:1","1000:1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1,vjust = 1)) +
labs(x="inital ratio (EC/PP)", y="final ratio (EC/PP)") +
ylim(c(NA,1.5))
plot_grid(plot_grid(plots[[1]] + ylim(NA,0.7),
plots[[2]]+ ylim(NA,2),
labels = "AUTO",
rel_widths = c(18,27),
ncol = 2,align = "h"),
plot_grid(p1,p2,labels = c("C","D"),ncol = 2),
ncol = 1,
rel_heights = c(5,4))
Figure 3. carbon preferences.
ggsave("figure 3.tiff",path="figures")
p_cue <- ggplot(biolog_24h,aes(ratio0,A590)) +
geom_boxplot() +
# stat_compare_means(ref.group = ".all.",
# label = "p.format",
# method="wilcox.test") +
geom_hline(aes(yintercept = median(A590)),lty=2,color="firebrick") +
scale_x_discrete(breaks=c("none","less","equal","more","all"),labels=c("P. putida","1:1000","1:1","1000:1","E. coli")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1,vjust = 1)) +
geom_text(aes(ratio0,y,label=y),
inherit.aes = F,
# color="firebrick",
data = biolog_24h %>% group_by(ratio0) %>%
summarise(y=median(A590)),
vjust=-0.3) +
labs(x="initial ratio (EC/PP)",y="CUE")
# add confidential levels in PCA
add_ellipase <- function(p, x="PC1", y="PC2", group="group",
ellipase_pro = 0.95,
linetype="dashed",
colour = "black",
lwd = 1,...){
obs <- p$data[,c(x,y,group)]
colnames(obs) <- c("x", "y", "group")
ellipse_pro <- ellipase_pro
theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
circle <- cbind(cos(theta), sin(theta))
ell <- plyr::ddply(obs, 'group', function(x) {
if(nrow(x) <= 2) {
return(NULL)
}
sigma <- var(cbind(x$x, x$y))
mu <- c(mean(x$x), mean(x$y))
ed <- sqrt(qchisq(ellipse_pro, df = 2))
data.frame(sweep(circle %*% chol(sigma) * ed, 2, mu, FUN = '+'))
})
names(ell)[2:3] <- c('x', 'y')
ell <- plyr::ddply(ell, plyr::.(group) , function(x) x[chull(x$x, x$y), ])
p <- p + geom_polygon(data = ell,
aes(x=x,y=y,group = group),
colour = colour,
linetype=linetype,
lwd =lwd,
inherit.aes = F,
...)
return(p)
}
library(vegan)
pca <- rda(t(M_A590_24h))
percent_var <- pca$CA$eig/pca$tot.chi
df <- scores(pca)$sites %>%
as.data.frame() %>%
tibble::rownames_to_column(var="sample") %>%
separate(sample,c("ratio0","rep"),sep="-",remove = F)
df$ratio0 <- factor(df$ratio0, levels = c("none","less","equal","more","all"),labels = c("P. putida","1:1000","1:1","1000:1","E. coli"))
group <- cutree(hclust(dist(t(M_A590_24h))),k=3)
clustered_group <- as.data.frame(group) %>% tibble::rownames_to_column(var = "sample")
df %<>% left_join(clustered_group)
p <- ggplot(df, aes(PC1,PC2,label=ratio0,color=ratio0))+
geom_point(size=3,show.legend = F) +
scale_color_manual(values = brewer.pal(9,"YlOrRd")[5:9],name="Initial Ratio")+
xlab(paste0("PC1: ", round(percent_var[1] * 100), "% variance")) +
ylab(paste0("PC2: ", round(percent_var[2] * 100), "% variance")) +
annotate("text", x = -.125, y = .75, label = "P. putida",
colour = brewer.pal(9,"YlOrRd")[5], size = 4, fontface = "italic") +
annotate("text", x = -.25, y = .25, label = "1:1000", size = 4,
colour = brewer.pal(9,"YlOrRd")[6]) +
annotate("text", x = -.8, y = -.87, label = "E. coli",
colour = brewer.pal(9,"YlOrRd")[9], size = 4, fontface = "italic") +
annotate("text", x = .75, y = -.2, label = "1:1", size = 4,
colour = brewer.pal(9,"YlOrRd")[7]) +
annotate("text", x = .45, y = -.35, label = "1000:1", size = 4,
colour = brewer.pal(9,"YlOrRd")[8])
p_pca <- add_ellipase(p,alpha=0.1,show.legend = F,lwd=1)
plot_grid(p_cue,p_pca,ncol = 2,labels = c("A","B"),rel_widths = c(7,10))
Figure S3. The carbon usage profiles of two monocultures (E.coli and P. putida) and three cocultures (1:1000, 1:1, and 1000:1). (A) boxplot. Number shows the median of each culture, and horizontal line shows the mean of all cultures. (B) PCA analysis of carbon usage profiles. Ellipses represent the 88% confidence interval of clustering.
ggsave("figure 4.tiff",path="figures")
anno_carbon_group <- carbon_group %>%
left_join(carbon_prefer) %>%
column_to_rownames(var="carbon_id")
colnames(M_A590_24h) <- rep(c("E. coli","1:1","1:1000","1000:1","P. putida"),each=3)
p_heatmap <- pheatmap(t(M_A590_24h),
annotation_col = anno_carbon_group[c(2,1)],
cutree_cols = 3,
# cutree_rows = 3,
fontsize_col = 6,
silent = T)
Figure 5. The CUE (A) and final ratio (B) of cultures with nine U2 carbon sources (from left to right, top to down).
biolog_24h_U2 <- left_join(biolog_24h,carbon_group) %>% filter(usage=="U2")
hsd_group <- lapply(unique(biolog_24h_U2$carbon_id), function(x){
m <- aov(A590~ratio0,data=filter(biolog_24h_U2,carbon_id==x))
library(agricolae)
g <- HSD.test(m,"ratio0",group=TRUE)$groups
d <- rownames_to_column(g,var="ratio0")
d$carbon_id <- x
return(d[-2])
})
hsd_group <- do.call("rbind",hsd_group)
hsd_group$ratio0 <- factor(hsd_group$ratio0,
levels = c("none","less","equal","more","all"))
# add group on top of boxplot
hsd_group <- biolog_24h_U2 %>% group_by(ratio0,carbon_id) %>% summarize(q3=quantile(A590)[3]) %>% left_join(hsd_group)
u2_p1 <- ggplot(biolog_24h_U2, aes(ratio0,A590)) +
geom_boxplot() +
geom_text(aes(x="none",y=max(A590)*1.1,label=carbon_id),color="grey",vjust=1,size=3,show.legend = F) +
geom_text(aes(x=ratio0,y=q3,label=groups),show.legend = F,
data = hsd_group,inherit.aes = F,
vjust=0,nudge_y = .2,hjust=0) +
facet_wrap(~carbon_id,ncol=5,
labeller = labeller(carbon_id=carbon_name_labeller)) +
scale_x_discrete(breaks=c("none","less","equal","more","all"),labels=c("P.putida","1:1000","1:1","1000:1","E.coli")) +
scale_y_continuous(breaks = c(0,1,2)) +
labs(x="",y="CUE") +
# ggpubr::stat_compare_means(method="aov",label="p.format") +
theme(axis.text.x = element_text(angle = 45,hjust = 1,vjust = 1),
legend.position = "top",
legend.direction = "horizontal"
)
plot_grid(ggplotify::as.ggplot(p_heatmap),
u2_p1,
labels = "AUTO",ncol=1)
Figure 4. Initial ratio regulates carbon usage profiles of cocultures. (A) Clustering of carbon sources by usage groups. In the heatmap, type of carbon sources was indicated by bars on the top, carbon id is indicated by the number on the bottom, and experiment replicates were given on the right. Legend colorbar indicates the range of CUE values. (B) The CUE of mono- and cocultures with 14 U2 carbon sources (from left to right, top to down). The x-axis indicates culture conditions, and the y-axis indicates CUEs. ANOVA and Tukey multiple comparisons were performed. Text on boxplot indicates whether significant variances were observed between different cultures.
ggsave("figure 5.tiff",path="figures")
Related to figure 4.
ggplot(biolog_24h, aes(ratio0,A590)) +
geom_boxplot() +
geom_text(aes(x="less",y=max(A590)*1.1,label=carbon_id),
color="grey",
vjust=1,size=3,show.legend = F) +
facet_wrap(~carbon_id,ncol=9) +
scale_x_discrete(breaks=c("none","less","equal","more","all"),labels=c("P. putida","1:1000","1:1","1000:1","E. coli")) + xlab(NULL) +
scale_y_continuous(breaks = c(0,1,2),name = "CUE") +
theme(axis.text.x = element_text(angle = 90,hjust = 1,vjust = 0.5),
legend.position = "top",
legend.direction = "horizontal",
strip.background = element_blank(), # remove facet label - "strip"
strip.text = element_blank())
ggsave("figure S3.tiff",path="figures")
Figure 6. Metabolic coupling in utilizing U2 carbon sources. The E. coli monoculture (A) and P. putida monoculture (B) only have a little efficiency on utilizing U2 carbon sources. In a coculture which has less E. coli population, metabolic coupling is hardly to setup as E. coli is the bottle neck to restrict the flow of metabolite (C). While E. coli has an equivalent population to P. putida, the metabolic flow can be established, and lead to high CUE for U2 carbon sources (D).
# ggplot 中显示回归曲线的公式和R^2值.
source("functions/ggplot_smooth_func.R")
files <- list.files(path="data/BIOLOG/",pattern = "*.csv",full.names = T)
read_BIOLOG <- function(f){
A590 <- read.csv(f,header = F,skip=18,nrows = 8)
A590 <- t(A590[,1:9]) %>% as.matrix() %>% as.vector()
A750 <- read.csv(f,header = F,skip=28,nrows = 8)
A750 <- t(A750[,1:9]) %>% as.matrix() %>% as.vector()
time <- as.numeric(str_extract(str_extract(f,"\\d+h"),"\\d+"))
if (str_detect(f,"\\d++\\.\\d+")) ratio0 <- str_extract(f,"\\d++\\.\\d+")
if (str_detect(f, "(ec|pp)")) ratio0 <- str_extract(f, "(ec|pp)")
if (str_detect(f,"-[1-3]\\D")) plate <- str_extract(str_extract(f,"-[1-3]\\D"),"[1-3]")
data.frame(plate=plate, carbon_id=1:72,time=time,ratio0=ratio0,A590=A590,A750=A750)
}
biolog <- do.call("rbind", lapply(files, read_BIOLOG))
biolog$ratio0 <- factor(biolog$ratio0,
levels = c("pp","1.1000","1.1","1000.1","ec"),
labels = c("P. putida","1:1000","1:1","1000:1","E. coli"))
biolog2 <- biolog %>%
group_by(plate,ratio0,time) %>%
mutate(A590=A590-A590[carbon_id==1],A750=A750-A750[carbon_id==1]) %>% # set negative control to zero
filter(carbon_id!=1) %>%
ungroup()
df_750 <- biolog2 %>% group_by(time,ratio0,carbon_id) %>%
summarise(mean=mean(A750),std=sd(A750))
df_590 <- biolog2 %>% group_by(time,ratio0,carbon_id) %>%
summarise(mean=mean(A590),std=sd(A590))
df_590 <- df_590[!(df_590$time=="28" ),]
df_750 <- df_750[!(df_750$time=="28" ),]
dat <- df_590[853:1207, 1:5] #hour 24
dat$time <- 0; dat$mean <- 0; dat$std <- 0 #change 24h to 0h
dat$ratio0 <- factor(dat$ratio0)
da0_590 <- rbind(df_590, dat)
da0_750 <- rbind(df_750, dat)
rm(dat)
ggplot( da0_750,aes(time,mean,color=ratio0,shape=ratio0,ymin=mean-std,ymax=mean+std)) +
geom_line(size=0.8) +
geom_errorbar(width=0.1) +
geom_point(size=0.8) +
xlab("time(h)") + ylab("Growth(A750)") + geom_text(aes(x=2,y=max(mean)*1.1,label=carbon_id),vjust=1,color="grey",size=4)+
facet_wrap(~carbon_id,ncol=9) + scale_y_continuous(breaks = c(0,0.5,1)) +
scale_x_continuous(breaks = seq(0, 24, by = 8),
labels = seq(0, 24, by = 8),
limits = c(0, 25))+
theme(axis.text = element_text(angle = 0,hjust = 1,vjust = 0.5, size = 10),
axis.title = element_text(size = 15), ##time(h)
legend.text = element_text(size = 15), ##1:1, 1:1000
legend.title = element_text(size = 15), ##ratio0
#legend.key = element_rect(size = 10),
legend.position = "top",
strip.background = element_blank(), # remove facet label - "strip"
strip.text = element_blank())
Figure S6. A750 growth curve.
ggplot( da0_590,aes(time,mean,color=ratio0,shape=ratio0,ymin=mean-std,ymax=mean+std)) +
geom_line(size=0.8) +
geom_errorbar(width=0.1) +
geom_point(size=0.8) +
xlab("time(h)") + ylab("Growth(A590)") + geom_text(aes(x=2,y=max(mean)*1.1,label=carbon_id),vjust=1,color="grey",size=4)+
facet_wrap(~carbon_id,ncol=9) + scale_y_continuous(breaks = c(0,1,2)) +
scale_x_continuous(breaks = seq(0, 24, by = 8),
labels = seq(0, 24, by = 8),
limits = c(0, 25))+
theme(axis.text = element_text(angle = 0,hjust = 1,vjust = 0.5, size = 10),
axis.title = element_text(size = 15), ##time(h)
legend.text = element_text(size = 15), ##1:1, 1:1000
legend.title = element_text(size = 15), ##ratio0
#legend.key = element_rect(size = 10),
legend.position = "top",
strip.background = element_blank(), # remove facet label - "strip"
strip.text = element_blank())
Figure S7. A590 growth curve.
ggplot(biolog, aes(A590,A750,color=ratio0)) +
geom_point(alpha=1/3,show.legend = F) +
scale_color_discrete(name="Initial Ratio") +
facet_wrap(~ratio0) +
geom_smooth(method = "lm",size=1,show.legend = F) +
stat_smooth_func(geom = "text",method = "lm",parse=T, hjust=0,xpos = 0.25,ypos = 1.4, show.legend = F)
Figure S8. The correlation between A590 and A750.
qPCR_data$ratio0 <- factor(qPCR_data$ratio0,
levels = c("less","equal","more"),
labels = c("1:1000","1:1","1000:1"))
qPCR_data$quantity <- qPCR_data$EC +qPCR_data$PP
qPCR <- tidyr::unite(qPCR_data, "ratio0_plate", ratio0, plate)
ggplot(qPCR,aes(ratio0_plate,quantity),color=ratio0_plate) +
geom_violin(aes(fill=factor(ratio0_plate)))+
geom_boxplot(width=0.1,position=position_dodge(0.8))+ #绘制箱线图
theme(axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5))+theme(legend.position="none")
Figure S9. The final number of bacteria in the co-culture systems after 24 hours.
mono_data <- mono_data %>% rename( species = Target.Name, quantity = Quantity_mono) %>% filter(carbon_id!=1) %>%
ungroup()
mono_data$species <-factor(mono_data$species,
levels = c("PP","EC"),
labels = c("P. putida","E. coli"))
d <- merge( mono_data,biolog_mono_24h, by = c("plate", "species","carbon_id"),all=T)
d$logCFU <- log10(d$quantity)
d2<- d[-c(4,74),]
ggplot(d2, aes(logCFU,A590,color=species)) +
geom_point(alpha=1/3,show.legend = F) +facet_wrap(.~species,scales = 'free_x' ) +
geom_smooth(method = "lm",size=1,show.legend = F) +
stat_smooth_func_with_pval(geom = "text",method = "lm",parse=T, hjust=0,xpos=7.0,xpos2 =7.0,ypos=2.0,ypos2 = 1.75, show.legend = F)
#> Warning: Removed 1 rows containing non-finite values (stat_smooth).
#> Warning: Removed 1 rows containing non-finite values (stat_smooth).
#> Warning: Removed 1 rows containing missing values (geom_point).
Figure S10. CUE of single cells of two species.